#Load Libraries & Set Environment
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(caret)
## Loading required package: lattice
library(MASS)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::lift() masks caret::lift()
## x dplyr::select() masks MASS::select()
library(e1071)
library(mvtnorm)
library(class)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
library(dplyr)
options(scipen = 999)
#attrition <- read.csv(file.choose())
attrition <- read.csv("/Users/allenmiller/OneDrive - Southern Methodist University/Doing Data Science/CaseStudy2/CaseStudy2-data.csv")
attrition$Attrition <- factor(attrition$Attrition)
attrition$BusinessTravel <- factor(attrition$BusinessTravel)
attrition$Department <- factor(attrition$Department)
attrition$EducationField <- factor(attrition$EducationField)
attrition$Gender <- factor(attrition$Gender)
attrition$JobRole <- factor(attrition$JobRole)
attrition$MaritalStatus <- factor(attrition$MaritalStatus)
attrition$Over18 <- factor(attrition$Over18)
attrition$OverTime <- factor(attrition$OverTime)
attrition$Education <- factor(attrition$Education)
attrition$EnvironmentSatisfaction <- factor(attrition$EnvironmentSatisfaction)
attrition$JobInvolvement <- factor(attrition$JobInvolvement)
attrition$JobLevel <- factor(attrition$JobLevel)
attrition$JobSatisfaction <- factor(attrition$JobSatisfaction)
attrition$PerformanceRating <- factor(attrition$PerformanceRating)
attrition$RelationshipSatisfaction <- factor(attrition$RelationshipSatisfaction)
attrition$StockOptionLevel <- factor(attrition$StockOptionLevel)
attrition$WorkLifeBalance <- factor(attrition$WorkLifeBalance)
str(attrition)
## 'data.frame': 870 obs. of 36 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Age : int 32 40 35 32 24 27 41 37 34 34 ...
## $ Attrition : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ BusinessTravel : Factor w/ 3 levels "Non-Travel","Travel_Frequently",..: 3 3 2 3 2 2 3 3 3 2 ...
## $ DailyRate : int 117 1308 200 801 567 294 1283 309 1333 653 ...
## $ Department : Factor w/ 3 levels "Human Resources",..: 3 2 2 3 2 2 2 3 3 2 ...
## $ DistanceFromHome : int 13 14 18 1 2 10 5 10 10 10 ...
## $ Education : Factor w/ 5 levels "1","2","3","4",..: 4 3 2 4 1 2 5 4 4 4 ...
## $ EducationField : Factor w/ 6 levels "Human Resources",..: 2 4 2 3 6 2 4 2 2 6 ...
## $ EmployeeCount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : int 859 1128 1412 2016 1646 733 1448 1105 1055 1597 ...
## $ EnvironmentSatisfaction : Factor w/ 4 levels "1","2","3","4": 2 3 3 3 1 4 2 4 3 4 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 2 2 1 1 2 2 1 1 2 ...
## $ HourlyRate : int 73 44 60 48 32 32 90 88 87 92 ...
## $ JobInvolvement : Factor w/ 4 levels "1","2","3","4": 3 2 3 3 3 3 4 2 3 2 ...
## $ JobLevel : Factor w/ 5 levels "1","2","3","4",..: 2 5 3 3 1 3 1 2 1 2 ...
## $ JobRole : Factor w/ 9 levels "Healthcare Representative",..: 8 6 5 8 7 5 7 8 9 1 ...
## $ JobSatisfaction : Factor w/ 4 levels "1","2","3","4": 4 3 4 4 4 1 3 4 3 3 ...
## $ MaritalStatus : Factor w/ 3 levels "Divorced","Married",..: 1 3 3 2 3 1 2 1 2 2 ...
## $ MonthlyIncome : int 4403 19626 9362 10422 3760 8793 2127 6694 2220 5063 ...
## $ MonthlyRate : int 9250 17544 19944 24032 17218 4809 5561 24223 18410 15332 ...
## $ NumCompaniesWorked : int 2 1 2 1 1 1 2 2 1 1 ...
## $ Over18 : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ OverTime : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 2 2 2 1 ...
## $ PercentSalaryHike : int 11 14 11 19 13 21 12 14 19 14 ...
## $ PerformanceRating : Factor w/ 2 levels "3","4": 1 1 1 1 1 2 1 1 1 1 ...
## $ RelationshipSatisfaction: Factor w/ 4 levels "1","2","3","4": 3 1 3 3 3 3 1 3 4 2 ...
## $ StandardHours : int 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : Factor w/ 4 levels "0","1","2","3": 2 1 1 3 1 3 1 4 2 2 ...
## $ TotalWorkingYears : int 8 21 10 14 6 9 7 8 1 8 ...
## $ TrainingTimesLastYear : int 3 2 2 3 2 4 5 5 2 3 ...
## $ WorkLifeBalance : Factor w/ 4 levels "1","2","3","4": 2 4 3 3 3 2 2 3 3 2 ...
## $ YearsAtCompany : int 5 20 2 14 6 9 4 1 1 8 ...
## $ YearsInCurrentRole : int 2 7 2 10 3 7 2 0 1 2 ...
## $ YearsSinceLastPromotion : int 0 4 2 5 1 1 0 0 0 7 ...
## $ YearsWithCurrManager : int 3 9 2 7 3 7 3 0 0 7 ...
#Explore the data ##Selected Variables ###Age(2), DistanceFromHome(7), JobLevel(16), MonthlyIncome(20), NumCompaniesWorked(22), OverTime(24)
#Keep = Age, BusinessTravel, Department, DistanceFromHome (2,4,6,7)
attrition %>% dplyr::select(Attrition, Age, BusinessTravel, DailyRate, Department, DistanceFromHome)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = Education, EnvironmentSatisfaction (8,12)
attrition %>% dplyr::select(Attrition, Education, EducationField, EnvironmentSatisfaction, Gender, HourlyRate)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = JobInvolvement, JobLevel, JobRole JobSatisfaction (15,16,17,18)
attrition %>% dplyr::select(Attrition, JobInvolvement, JobLevel, JobRole, JobSatisfaction, MaritalStatus)%>%
ggpairs()
#Keep = MonthlyIncome, NumCompaniesWorked, OverTime (20,22,24)
attrition %>% dplyr::select(Attrition, MonthlyIncome, MonthlyRate, NumCompaniesWorked, Over18, OverTime)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = PerformanceRating, RelationshipSatisfaction, StockOptionLevel, TotalWorkingYears (26,27,29,30)
attrition %>% dplyr::select(Attrition, PercentSalaryHike, PerformanceRating, RelationshipSatisfaction, StockOptionLevel, TotalWorkingYears)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = WorkLifeBalance, YearsAtCompany, YearsInCurrentRole (32,33,34)
attrition %>% dplyr::select(Attrition, TrainingTimesLastYear, WorkLifeBalance, YearsAtCompany, YearsInCurrentRole)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = YearsSinceLastPromotion, YearsWithCurrManager (35,36)
attrition %>% dplyr::select(Attrition, YearsSinceLastPromotion, YearsWithCurrManager)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Run Neive Bayes with 22 selected variables ##Results ###Mean Accuracy = 83.25% ###Mean Sensitivity = 87.75% ###Mean Specificity = 61.03%
set.seed(10)
iterations = 10
masterAcc = matrix(nrow = iterations)
masterSen = matrix(nrow = iterations)
masterSpec = matrix(nrow = iterations)
splitPerc = .7 #Training / Test split Percentage
for(j in 1:iterations)
{
trainIndices = sample(1:dim(attrition)[1],round(splitPerc * dim(attrition)[1]))
train = attrition[trainIndices,]
test = attrition[-trainIndices,]
model = naiveBayes(train,train$Attrition, laplace = 1)
table(predict(model,test[,c(2,4,6,7,8,12,15,16,17,18,20,22,24,26,27,29,30,32,33,34,35,36)]),test$Attrition)
CMB = confusionMatrix(table(predict(model,test[,c(2,4,6,7,8,12,15,16,17,18,20,22,24,26,27,29,30,32,33,34,35,36)]),test$Attrition))
masterAcc[j] = CMB$overall[1]
masterSen[j] = CMB$byClass[1]
masterSpec[j] = CMB$byClass[2]
}
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'ID'. Did you use factors
## with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'DailyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeCount'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'EmployeeNumber'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'HourlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'MonthlyRate'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'PercentSalaryHike'. Did you
## use factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'StandardHours'. Did you use
## factors with numeric labels for training, and numeric values for new data?
## Warning in predict.naiveBayes(model, test[, c(2, 4, 6, 7, 8, 12, 15, 16, : Type
## mismatch between training and new data for variable 'TrainingTimesLastYear'.
## Did you use factors with numeric labels for training, and numeric values for new
## data?
MeanAcc = colMeans(masterAcc)
MeanSen = colMeans(masterSen)
MeanSpec = colMeans(masterSpec)
#Results
MeanAcc
## [1] 0.832567
MeanSen
## [1] 0.8775601
MeanSpec
## [1] 0.6103106
#Use Test Set inside the Model
#Blank DataSet
#attritionTest <- read.csv(file.choose())
attritionTest <- read.csv("/Users/allenmiller/OneDrive - Southern Methodist University/Doing Data Science/CaseStudy2/CaseStudy2CompSet No Attrition.csv")
summary(attritionTest)
## ID Age BusinessTravel DailyRate
## Min. :1171 Min. :19.00 Length:300 Min. : 102.0
## 1st Qu.:1246 1st Qu.:31.00 Class :character 1st Qu.: 448.0
## Median :1320 Median :36.00 Mode :character Median : 775.0
## Mean :1320 Mean :37.86 Mean : 784.8
## 3rd Qu.:1395 3rd Qu.:44.00 3rd Qu.:1117.0
## Max. :1470 Max. :60.00 Max. :1490.0
## Department DistanceFromHome Education EducationField
## Length:300 Min. : 1.00 Min. :1.000 Length:300
## Class :character 1st Qu.: 2.00 1st Qu.:2.000 Class :character
## Mode :character Median : 7.00 Median :3.000 Mode :character
## Mean : 9.26 Mean :2.973
## 3rd Qu.:14.00 3rd Qu.:4.000
## Max. :29.00 Max. :5.000
## EmployeeCount EmployeeNumber EnvironmentSatisfaction Gender
## Min. :1 Min. : 2.0 Min. :1.000 Length:300
## 1st Qu.:1 1st Qu.: 508.8 1st Qu.:2.000 Class :character
## Median :1 Median : 994.5 Median :3.000 Mode :character
## Mean :1 Mean :1020.9 Mean :2.733
## 3rd Qu.:1 3rd Qu.:1542.5 3rd Qu.:4.000
## Max. :1 Max. :2065.0 Max. :4.000
## HourlyRate JobInvolvement JobLevel JobRole
## Min. : 30.00 Min. :1.000 Min. :1.0 Length:300
## 1st Qu.: 50.00 1st Qu.:2.000 1st Qu.:1.0 Class :character
## Median : 66.00 Median :3.000 Median :2.0 Mode :character
## Mean : 66.07 Mean :2.743 Mean :2.2
## 3rd Qu.: 83.00 3rd Qu.:3.000 3rd Qu.:3.0
## Max. :100.00 Max. :4.000 Max. :5.0
## JobSatisfaction MaritalStatus MonthlyIncome MonthlyRate
## Min. :1.000 Length:300 Min. : 1232 Min. : 2097
## 1st Qu.:2.000 Class :character 1st Qu.: 3034 1st Qu.: 8420
## Median :3.000 Mode :character Median : 5208 Median :15091
## Mean :2.767 Mean : 7103 Mean :14499
## 3rd Qu.:4.000 3rd Qu.: 9750 3rd Qu.:20330
## Max. :4.000 Max. :19973 Max. :26914
## NumCompaniesWorked Over18 OverTime PercentSalaryHike
## Min. :0.000 Length:300 Length:300 Min. :11.00
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:12.00
## Median :2.000 Mode :character Mode :character Median :14.00
## Mean :2.547 Mean :15.17
## 3rd Qu.:4.000 3rd Qu.:18.00
## Max. :9.000 Max. :25.00
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## Min. :3.000 Min. :1.000 Min. :80 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:80 1st Qu.:0.0000
## Median :3.000 Median :3.000 Median :80 Median :1.0000
## Mean :3.153 Mean :2.803 Mean :80 Mean :0.7833
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:80 3rd Qu.:1.0000
## Max. :4.000 Max. :4.000 Max. :80 Max. :3.0000
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## Min. : 0.00 Min. :0.000 Min. :1.000 Min. : 0.000
## 1st Qu.: 6.00 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 3.000
## Median :10.00 Median :2.000 Median :3.000 Median : 5.000
## Mean :12.44 Mean :2.683 Mean :2.747 Mean : 7.527
## 3rd Qu.:18.00 3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:10.000
## Max. :38.00 Max. :6.000 Max. :4.000 Max. :37.000
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager
## Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 2.00
## Median : 3.00 Median : 1.00 Median : 3.00
## Mean : 4.33 Mean : 2.29 Mean : 4.38
## 3rd Qu.: 7.00 3rd Qu.: 3.00 3rd Qu.: 7.00
## Max. :18.00 Max. :15.00 Max. :17.00
#Change Test Data to Factors where applicable
attritionTest$BusinessTravel <- factor(attritionTest$BusinessTravel)
attritionTest$Department <- factor(attritionTest$Department)
attritionTest$EducationField <- factor(attritionTest$EducationField)
attritionTest$Gender <- factor(attritionTest$Gender)
attritionTest$JobRole <- factor(attritionTest$JobRole)
attritionTest$MaritalStatus <- factor(attritionTest$MaritalStatus)
attritionTest$Over18 <- factor(attritionTest$Over18)
attritionTest$OverTime <- factor(attritionTest$OverTime)
attritionTest$Education <- factor(attritionTest$Education)
attritionTest$EnvironmentSatisfaction <- factor(attritionTest$EnvironmentSatisfaction)
attritionTest$JobInvolvement <- factor(attritionTest$JobInvolvement)
attritionTest$JobLevel <- factor(attritionTest$JobLevel)
attritionTest$JobSatisfaction <- factor(attritionTest$JobSatisfaction)
attritionTest$PerformanceRating <- factor(attritionTest$PerformanceRating)
attritionTest$RelationshipSatisfaction <- factor(attritionTest$RelationshipSatisfaction)
attritionTest$StockOptionLevel <- factor(attritionTest$StockOptionLevel)
attritionTest$WorkLifeBalance <- factor(attritionTest$WorkLifeBalance)
str(attritionTest)
## 'data.frame': 300 obs. of 35 variables:
## $ ID : int 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 ...
## $ Age : int 35 33 26 55 29 51 52 39 31 31 ...
## $ BusinessTravel : Factor w/ 3 levels "Non-Travel","Travel_Frequently",..: 3 3 3 3 3 2 1 3 3 2 ...
## $ DailyRate : int 750 147 1330 1311 1246 1456 585 1387 1062 534 ...
## $ Department : Factor w/ 3 levels "Human Resources",..: 2 1 2 2 3 2 3 2 2 2 ...
## $ DistanceFromHome : int 28 2 21 2 19 1 29 10 24 20 ...
## $ Education : Factor w/ 5 levels "1","2","3","4",..: 3 3 3 3 3 4 4 5 3 3 ...
## $ EducationField : Factor w/ 6 levels "Human Resources",..: 2 1 4 2 2 4 2 4 4 2 ...
## $ EmployeeCount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : int 1596 1207 1107 505 1497 145 2019 1618 1252 587 ...
## $ EnvironmentSatisfaction : Factor w/ 4 levels "1","2","3","4": 2 2 1 3 3 1 1 2 3 1 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 2 2 1 2 1 2 2 1 2 ...
## $ HourlyRate : int 46 99 37 97 77 30 40 76 96 66 ...
## $ JobInvolvement : Factor w/ 4 levels "1","2","3","4": 4 3 3 3 2 2 3 3 2 3 ...
## $ JobLevel : Factor w/ 5 levels "1","2","3","4",..: 2 1 1 4 2 3 1 2 2 3 ...
## $ JobRole : Factor w/ 9 levels "Healthcare Representative",..: 3 2 3 4 8 1 9 5 1 1 ...
## $ JobSatisfaction : Factor w/ 4 levels "1","2","3","4": 3 3 3 4 3 1 4 1 1 3 ...
## $ MaritalStatus : Factor w/ 3 levels "Divorced","Married",..: 2 2 1 3 1 3 1 2 3 2 ...
## $ MonthlyIncome : int 3407 3600 2377 16659 8620 7484 3482 5377 6812 9824 ...
## $ MonthlyRate : int 25348 8429 19373 23258 23757 25796 19788 3835 17198 22908 ...
## $ NumCompaniesWorked : int 1 1 1 2 1 3 2 2 1 3 ...
## $ Over18 : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ OverTime : Factor w/ 2 levels "No","Yes": 1 1 1 2 1 1 1 1 1 1 ...
## $ PercentSalaryHike : int 17 13 20 13 14 20 15 13 19 12 ...
## $ PerformanceRating : Factor w/ 2 levels "3","4": 1 1 2 1 1 2 1 1 1 1 ...
## $ RelationshipSatisfaction: Factor w/ 4 levels "1","2","3","4": 4 4 3 3 3 3 2 4 2 1 ...
## $ StandardHours : int 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : Factor w/ 4 levels "0","1","2","3": 3 2 2 1 3 1 3 4 1 1 ...
## $ TotalWorkingYears : int 10 5 1 30 10 23 16 10 10 12 ...
## $ TrainingTimesLastYear : int 3 2 0 2 3 1 3 3 2 2 ...
## $ WorkLifeBalance : Factor w/ 4 levels "1","2","3","4": 2 3 2 3 3 2 2 3 3 3 ...
## $ YearsAtCompany : int 10 5 1 5 10 13 9 7 10 1 ...
## $ YearsInCurrentRole : int 9 4 1 4 7 12 8 7 9 0 ...
## $ YearsSinceLastPromotion : int 6 1 0 1 0 12 0 7 1 0 ...
## $ YearsWithCurrManager : int 8 4 0 2 4 8 0 7 8 0 ...
#Run Model and Predict On Test Data
model1 = naiveBayes(attrition[,c(2,4,6,7,8,12,15,16,17,18,20,22,24,26,27,29,30,32,33,34,35,36)],attrition$Attrition, laplace = 1)
attritionPredict <- unlist(predict(model1,attritionTest[,c(3,5,6,7,11,14,15,16,17,19,21,23,25,26,28,29,31,32,33,34,35)]))
## Warning in predict.naiveBayes(model1, attritionTest[, c(3, 5, 6, 7, 11, : Type
## mismatch between training and new data for variable 'Age'. Did you use factors
## with numeric labels for training, and numeric values for new data?
attritionTest$Attrition <- attritionPredict
summary(attritionTest)
## ID Age BusinessTravel DailyRate
## Min. :1171 Min. :19.00 Non-Travel : 32 Min. : 102.0
## 1st Qu.:1246 1st Qu.:31.00 Travel_Frequently: 57 1st Qu.: 448.0
## Median :1320 Median :36.00 Travel_Rarely :211 Median : 775.0
## Mean :1320 Mean :37.86 Mean : 784.8
## 3rd Qu.:1395 3rd Qu.:44.00 3rd Qu.:1117.0
## Max. :1470 Max. :60.00 Max. :1490.0
##
## Department DistanceFromHome Education
## Human Resources : 11 Min. : 1.00 1: 32
## Research & Development:209 1st Qu.: 2.00 2: 49
## Sales : 80 Median : 7.00 3:125
## Mean : 9.26 4: 83
## 3rd Qu.:14.00 5: 11
## Max. :29.00
##
## EducationField EmployeeCount EmployeeNumber EnvironmentSatisfaction
## Human Resources : 7 Min. :1 Min. : 2.0 1: 56
## Life Sciences :130 1st Qu.:1 1st Qu.: 508.8 2: 56
## Marketing : 27 Median :1 Median : 994.5 3:100
## Medical : 94 Mean :1 Mean :1020.9 4: 88
## Other : 12 3rd Qu.:1 3rd Qu.:1542.5
## Technical Degree: 30 Max. :1 Max. :2065.0
##
## Gender HourlyRate JobInvolvement JobLevel
## Female:105 Min. : 30.00 1: 20 1:106
## Male :195 1st Qu.: 50.00 2: 72 2:104
## Median : 66.00 3:173 3: 37
## Mean : 66.07 4: 35 4: 30
## 3rd Qu.: 83.00 5: 23
## Max. :100.00
##
## JobRole JobSatisfaction MaritalStatus MonthlyIncome
## Research Scientist :61 1:53 Divorced: 65 Min. : 1232
## Sales Executive :57 2:59 Married :128 1st Qu.: 3034
## Laboratory Technician :55 3:93 Single :107 Median : 5208
## Manufacturing Director :31 4:95 Mean : 7103
## Manager :30 3rd Qu.: 9750
## Healthcare Representative:29 Max. :19973
## (Other) :37
## MonthlyRate NumCompaniesWorked Over18 OverTime PercentSalaryHike
## Min. : 2097 Min. :0.000 Y:300 No :212 Min. :11.00
## 1st Qu.: 8420 1st Qu.:1.000 Yes: 88 1st Qu.:12.00
## Median :15091 Median :2.000 Median :14.00
## Mean :14499 Mean :2.547 Mean :15.17
## 3rd Qu.:20330 3rd Qu.:4.000 3rd Qu.:18.00
## Max. :26914 Max. :9.000 Max. :25.00
##
## PerformanceRating RelationshipSatisfaction StandardHours StockOptionLevel
## 3:254 1: 47 Min. :80 0:135
## 4: 46 2: 54 1st Qu.:80 1:112
## 3:110 Median :80 2: 36
## 4: 89 Mean :80 3: 17
## 3rd Qu.:80
## Max. :80
##
## TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany
## Min. : 0.00 Min. :0.000 1: 14 Min. : 0.000
## 1st Qu.: 6.00 1st Qu.:2.000 2: 77 1st Qu.: 3.000
## Median :10.00 Median :2.000 3:180 Median : 5.000
## Mean :12.44 Mean :2.683 4: 29 Mean : 7.527
## 3rd Qu.:18.00 3rd Qu.:3.000 3rd Qu.:10.000
## Max. :38.00 Max. :6.000 Max. :37.000
##
## YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager Attrition
## Min. : 0.00 Min. : 0.00 Min. : 0.00 No :256
## 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 2.00 Yes: 44
## Median : 3.00 Median : 1.00 Median : 3.00
## Mean : 4.33 Mean : 2.29 Mean : 4.38
## 3rd Qu.: 7.00 3rd Qu.: 3.00 3rd Qu.: 7.00
## Max. :18.00 Max. :15.00 Max. :17.00
##
#Create a Data Frame with only ID and Attrition and Sort by ID
attritionFinal <- attritionTest %>% dplyr::select(ID, Attrition)
attritionFinal <- attritionFinal[order(attritionFinal$ID),]
#verify data and sorting
attritionFinal
## ID Attrition
## 1 1171 No
## 2 1172 No
## 3 1173 No
## 4 1174 No
## 5 1175 No
## 6 1176 No
## 7 1177 No
## 8 1178 No
## 9 1179 No
## 10 1180 No
## 11 1181 Yes
## 12 1182 No
## 13 1183 No
## 14 1184 No
## 15 1185 No
## 16 1186 No
## 17 1187 No
## 18 1188 No
## 19 1189 No
## 20 1190 No
## 21 1191 Yes
## 22 1192 No
## 23 1193 No
## 24 1194 No
## 25 1195 No
## 26 1196 No
## 27 1197 No
## 28 1198 Yes
## 29 1199 No
## 30 1200 Yes
## 31 1201 Yes
## 32 1202 No
## 33 1203 No
## 34 1204 No
## 35 1205 Yes
## 36 1206 No
## 37 1207 No
## 38 1208 No
## 39 1209 No
## 40 1210 No
## 41 1211 No
## 42 1212 No
## 43 1213 No
## 44 1214 No
## 45 1215 Yes
## 46 1216 No
## 47 1217 No
## 48 1218 No
## 49 1219 No
## 50 1220 No
## 51 1221 No
## 52 1222 No
## 53 1223 No
## 54 1224 No
## 55 1225 No
## 56 1226 No
## 57 1227 No
## 58 1228 No
## 59 1229 No
## 60 1230 No
## 61 1231 No
## 62 1232 No
## 63 1233 Yes
## 64 1234 No
## 65 1235 No
## 66 1236 No
## 67 1237 No
## 68 1238 No
## 69 1239 Yes
## 70 1240 No
## 71 1241 No
## 72 1242 No
## 73 1243 No
## 74 1244 No
## 75 1245 No
## 76 1246 No
## 77 1247 No
## 78 1248 No
## 79 1249 No
## 80 1250 No
## 81 1251 No
## 82 1252 No
## 83 1253 No
## 84 1254 No
## 85 1255 No
## 86 1256 No
## 87 1257 No
## 88 1258 No
## 89 1259 No
## 90 1260 No
## 91 1261 No
## 92 1262 No
## 93 1263 No
## 94 1264 No
## 95 1265 No
## 96 1266 No
## 97 1267 No
## 98 1268 No
## 99 1269 No
## 100 1270 Yes
## 101 1271 No
## 102 1272 No
## 103 1273 No
## 104 1274 No
## 105 1275 Yes
## 106 1276 No
## 107 1277 No
## 108 1278 No
## 109 1279 Yes
## 110 1280 No
## 111 1281 No
## 112 1282 No
## 113 1283 No
## 114 1284 No
## 115 1285 No
## 116 1286 No
## 117 1287 No
## 118 1288 No
## 119 1289 Yes
## 120 1290 No
## 121 1291 No
## 122 1292 No
## 123 1293 No
## 124 1294 No
## 125 1295 No
## 126 1296 No
## 127 1297 Yes
## 128 1298 No
## 129 1299 No
## 130 1300 No
## 131 1301 No
## 132 1302 No
## 133 1303 No
## 134 1304 No
## 135 1305 No
## 136 1306 No
## 137 1307 No
## 138 1308 No
## 139 1309 Yes
## 140 1310 Yes
## 141 1311 No
## 142 1312 No
## 143 1313 No
## 144 1314 No
## 145 1315 No
## 146 1316 No
## 147 1317 Yes
## 148 1318 Yes
## 149 1319 No
## 150 1320 No
## 151 1321 No
## 152 1322 No
## 153 1323 No
## 154 1324 No
## 155 1325 Yes
## 156 1326 No
## 157 1327 Yes
## 158 1328 Yes
## 159 1329 No
## 160 1330 No
## 161 1331 No
## 162 1332 No
## 163 1333 No
## 164 1334 No
## 165 1335 No
## 166 1336 Yes
## 167 1337 No
## 168 1338 No
## 169 1339 No
## 170 1340 No
## 171 1341 Yes
## 172 1342 No
## 173 1343 No
## 174 1344 No
## 175 1345 No
## 176 1346 No
## 177 1347 No
## 178 1348 No
## 179 1349 No
## 180 1350 No
## 181 1351 Yes
## 182 1352 No
## 183 1353 No
## 184 1354 No
## 185 1355 No
## 186 1356 No
## 187 1357 No
## 188 1358 Yes
## 189 1359 No
## 190 1360 No
## 191 1361 No
## 192 1362 No
## 193 1363 No
## 194 1364 No
## 195 1365 Yes
## 196 1366 No
## 197 1367 Yes
## 198 1368 No
## 199 1369 No
## 200 1370 No
## 201 1371 No
## 202 1372 No
## 203 1373 No
## 204 1374 No
## 205 1375 No
## 206 1376 No
## 207 1377 No
## 208 1378 No
## 209 1379 No
## 210 1380 No
## 211 1381 No
## 212 1382 No
## 213 1383 No
## 214 1384 No
## 215 1385 No
## 216 1386 No
## 217 1387 No
## 218 1388 No
## 219 1389 No
## 220 1390 No
## 221 1391 No
## 222 1392 No
## 223 1393 No
## 224 1394 No
## 225 1395 No
## 226 1396 No
## 227 1397 No
## 228 1398 No
## 229 1399 Yes
## 230 1400 No
## 231 1401 Yes
## 232 1402 Yes
## 233 1403 No
## 234 1404 Yes
## 235 1405 No
## 236 1406 No
## 237 1407 No
## 238 1408 No
## 239 1409 No
## 240 1410 No
## 241 1411 No
## 242 1412 No
## 243 1413 No
## 244 1414 No
## 245 1415 No
## 246 1416 No
## 247 1417 Yes
## 248 1418 No
## 249 1419 No
## 250 1420 Yes
## 251 1421 Yes
## 252 1422 No
## 253 1423 No
## 254 1424 No
## 255 1425 No
## 256 1426 No
## 257 1427 No
## 258 1428 No
## 259 1429 No
## 260 1430 No
## 261 1431 No
## 262 1432 No
## 263 1433 No
## 264 1434 No
## 265 1435 No
## 266 1436 No
## 267 1437 No
## 268 1438 No
## 269 1439 Yes
## 270 1440 No
## 271 1441 No
## 272 1442 No
## 273 1443 No
## 274 1444 Yes
## 275 1445 Yes
## 276 1446 No
## 277 1447 No
## 278 1448 No
## 279 1449 No
## 280 1450 No
## 281 1451 No
## 282 1452 No
## 283 1453 No
## 284 1454 No
## 285 1455 Yes
## 286 1456 Yes
## 287 1457 No
## 288 1458 Yes
## 289 1459 Yes
## 290 1460 Yes
## 291 1461 No
## 292 1462 No
## 293 1463 No
## 294 1464 No
## 295 1465 Yes
## 296 1466 No
## 297 1467 No
## 298 1468 Yes
## 299 1469 No
## 300 1470 No
#Save to CSV
#write.csv(attritionFinal,"Case2PredictionsClassifyMiller.csv",row.names = FALSE)
#Farther Data Analysis
#Look at how long employees have been at their company by Job Role
##Managers have the highest median time at the company as well as the largest range of time
###Sales Reps typically spend the shortest amount of time at the company with the Median time well below 5 years and 75% of all employees spending less than 5 years at the company
attrition %>% ggplot(aes(x = JobRole, y = YearsAtCompany, color = JobRole)) + geom_boxplot() +
labs(title = "Job Role vs Years at Company", x = "Job Role", y = "Years at Company" , color = "Job Role")
#Looking at how Years At Company and Marital Status influence Business Travel
##Divorced employees who travel have a higher median time at the company
###Single Employees all have a median time at company around 5 regardless of the amount of business travel
####All employees who travel frequently have a higher median time at the company regardless of Marital Status
attrition %>% ggplot(aes(x = BusinessTravel, y = YearsAtCompany, color = MaritalStatus)) + geom_boxplot() +
labs(title = "Business Travel vs Years at Company colored by Marital Status", x = "Business Travel",
y = "Years At Company", color = "Marital Status")
#How Age and Gender influence Monthly income
##Seems to be some positive correlation between Monthly Income and Age though it does not appear to be linear
###The distribution of Monthly Income is very similar between Male and Female
attrition %>% ggplot(aes(x = Age, y = MonthlyIncome, color = Gender)) + geom_point() + facet_wrap(~ Gender) +
labs(title = "How Age and Gender influence Monthly Income", x = "Age", y = "Monthly Income", color = "Gender")
#Look at Mean Monthly Income by Job Role
##Managers have the highest Mean Income while Sales Reps have the lowest Mean Income
###33% of the Job Roles have a Mean Income over 7,500
####A little more than 50% of the Job Roles have a Mean Income over 5,000
#####There are 4 Job Roles with a Mean Income of around 2,500 or less
AvgRoleIncome <- ddply(attrition, "JobRole", summarise, MMI = mean(MonthlyIncome))
ggplot(AvgRoleIncome, aes(x = JobRole, y = MMI, fill = JobRole)) + geom_bar(stat = "identity") +
labs(title = "Mean Monthly Income per Job Role", x = "Job Role", y = "Mean Income (Monthly)", color = "Job Role")
#Histogram of Job Satisfaction in the 3 departments
##Human Resources has the fewest responses, however there is little variation in total responses for rating 1,3, and 4
###Overall most of the company is satisfied (rating 3 or 4) with their managers
attrition %>% ggplot(aes(x = RelationshipSatisfaction, fill = Department)) + geom_bar() + facet_grid(~Department) +
labs(title = "Distribution of Relationship Satisfaction Rating of Manager by Department", x = "Satisfaction Rating",
y = "Total Responses")
#Monthly Income Analysis ##Selecting the variables
#Keep = YearsAtCompany, JobLevel, Age, OverTime
attrition %>% dplyr::select(MonthlyIncome, YearsAtCompany,JobLevel,Age,OverTime)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = Department, Education
attrition %>% dplyr::select(MonthlyIncome, DailyRate,Department,Education,EducationField)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = JobRole
attrition %>% dplyr::select(MonthlyIncome, Gender,HourlyRate,JobInvolvement,JobRole)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = None
attrition %>% dplyr::select(MonthlyIncome, MonthlyRate,NumCompaniesWorked,PercentSalaryHike,PerformanceRating)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = TotalWorkingYears
attrition %>% dplyr::select(MonthlyIncome, StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance)%>%
ggpairs()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Keep = YearsInCurrentRole, YearsSinceLastPromotion, YearsWithCurrManager
attrition %>% dplyr::select(MonthlyIncome, YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager)%>%
ggpairs()
#Create a list to hold 11 chosen variables
custom.var = attrition %>%
dplyr::select(MonthlyIncome, ID, YearsAtCompany, JobLevel, Age, Department, Education, JobRole,
OverTime, TotalWorkingYears, YearsInCurrentRole, YearsSinceLastPromotion, YearsWithCurrManager)
#Create the Model and Test ##RMSE = 1018.206
#Model Training
set.seed(11)
train.control <- trainControl(method = "cv", number = 10)
custom.model = train(MonthlyIncome~.-ID, data=custom.var,
method="lmStepAIC",
trControl = train.control,
trace=FALSE)
custom.model$results
## parameter RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 none 1018.206 0.9496054 784.8506 109.9547 0.01565338 49.23103
custom.model$finalModel
##
## Call:
## lm(formula = .outcome ~ JobLevel2 + JobLevel3 + JobLevel4 + JobLevel5 +
## Education4 + Education5 + `JobRoleHuman Resources` + `JobRoleLaboratory Technician` +
## JobRoleManager + `JobRoleResearch Director` + `JobRoleResearch Scientist` +
## `JobRoleSales Representative` + TotalWorkingYears, data = dat)
##
## Coefficients:
## (Intercept) JobLevel2
## 3614.32 1700.77
## JobLevel3 JobLevel4
## 4955.01 8349.85
## JobLevel5 Education4
## 11057.37 121.41
## Education5 `JobRoleHuman Resources`
## -378.61 -1095.75
## `JobRoleLaboratory Technician` JobRoleManager
## -1245.81 3285.30
## `JobRoleResearch Director` `JobRoleResearch Scientist`
## 3477.39 -1043.67
## `JobRoleSales Representative` TotalWorkingYears
## -1275.89 44.03
summary(custom.model$finalModel)
##
## Call:
## lm(formula = .outcome ~ JobLevel2 + JobLevel3 + JobLevel4 + JobLevel5 +
## Education4 + Education5 + `JobRoleHuman Resources` + `JobRoleLaboratory Technician` +
## JobRoleManager + `JobRoleResearch Director` + `JobRoleResearch Scientist` +
## `JobRoleSales Representative` + TotalWorkingYears, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3170.2 -651.4 -75.3 615.8 4333.7
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3614.318 145.811 24.788
## JobLevel2 1700.773 139.081 12.229
## JobLevel3 4955.011 186.778 26.529
## JobLevel4 8349.853 283.597 29.443
## JobLevel5 11057.374 331.836 33.322
## Education4 121.408 77.948 1.558
## Education5 -378.614 206.034 -1.838
## `JobRoleHuman Resources` -1095.745 230.275 -4.758
## `JobRoleLaboratory Technician` -1245.806 141.849 -8.783
## JobRoleManager 3285.305 216.335 15.186
## `JobRoleResearch Director` 3477.388 188.016 18.495
## `JobRoleResearch Scientist` -1043.671 146.185 -7.139
## `JobRoleSales Representative` -1275.892 193.812 -6.583
## TotalWorkingYears 44.030 7.748 5.683
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## JobLevel2 < 0.0000000000000002 ***
## JobLevel3 < 0.0000000000000002 ***
## JobLevel4 < 0.0000000000000002 ***
## JobLevel5 < 0.0000000000000002 ***
## Education4 0.1197
## Education5 0.0665 .
## `JobRoleHuman Resources` 0.0000022897176 ***
## `JobRoleLaboratory Technician` < 0.0000000000000002 ***
## JobRoleManager < 0.0000000000000002 ***
## `JobRoleResearch Director` < 0.0000000000000002 ***
## `JobRoleResearch Scientist` 0.0000000000020 ***
## `JobRoleSales Representative` 0.0000000000802 ***
## TotalWorkingYears 0.0000000181721 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1004 on 856 degrees of freedom
## Multiple R-squared: 0.953, Adjusted R-squared: 0.9523
## F-statistic: 1335 on 13 and 856 DF, p-value: < 0.00000000000000022
print(custom.model)
## Linear Regression with Stepwise Selection
##
## 870 samples
## 12 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 783, 783, 783, 783, 782, 784, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 1018.206 0.9496054 784.8506
#Load in Test Data and Clean Data to match train data
#MonthlyIncomeTest <- read.csv(file.choose())
MonthlyIncomeTest <- read.csv("/Users/allenmiller/OneDrive - Southern Methodist University/Doing Data Science/CaseStudy2/CaseStudy2CompSet No Salary.csv")
str(MonthlyIncomeTest)
## 'data.frame': 300 obs. of 35 variables:
## $ ID : int 871 872 873 874 875 876 877 878 879 880 ...
## $ Age : int 43 33 55 36 27 39 33 21 30 51 ...
## $ Attrition : chr "No" "No" "Yes" "No" ...
## $ BusinessTravel : chr "Travel_Frequently" "Travel_Rarely" "Travel_Rarely" "Non-Travel" ...
## $ DailyRate : int 1422 461 267 1351 1302 895 750 251 1312 1405 ...
## $ Department : chr "Sales" "Research & Development" "Sales" "Research & Development" ...
## $ DistanceFromHome : int 2 13 13 9 19 5 22 10 23 11 ...
## $ Education : int 4 1 4 4 3 3 2 2 3 2 ...
## $ EducationField : chr "Life Sciences" "Life Sciences" "Marketing" "Life Sciences" ...
## $ EmployeeCount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : int 1849 995 1372 1949 1619 42 160 1279 159 1367 ...
## $ EnvironmentSatisfaction : int 1 2 1 1 4 4 3 1 1 4 ...
## $ Gender : chr "Male" "Female" "Male" "Male" ...
## $ HourlyRate : int 92 53 85 66 67 56 95 45 96 82 ...
## $ JobInvolvement : int 3 3 4 4 2 3 3 2 1 2 ...
## $ JobLevel : int 2 1 4 1 1 2 2 1 1 4 ...
## $ JobRole : chr "Sales Executive" "Research Scientist" "Sales Executive" "Laboratory Technician" ...
## $ JobSatisfaction : int 4 4 3 2 1 4 2 3 3 2 ...
## $ MaritalStatus : chr "Married" "Single" "Single" "Married" ...
## $ MonthlyRate : int 19246 17241 9277 9238 16290 3335 15480 25308 22310 24439 ...
## $ NumCompaniesWorked : int 1 3 6 1 1 3 0 1 1 3 ...
## $ Over18 : chr "Y" "Y" "Y" "Y" ...
## $ OverTime : chr "No" "No" "Yes" "No" ...
## $ PercentSalaryHike : int 20 18 17 22 11 14 13 20 25 16 ...
## $ PerformanceRating : int 4 3 3 4 3 3 3 4 4 3 ...
## $ RelationshipSatisfaction: int 3 1 3 2 1 3 1 3 3 2 ...
## $ StandardHours : int 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : int 1 0 0 0 2 1 1 0 3 0 ...
## $ TotalWorkingYears : int 7 5 24 5 7 19 8 2 10 29 ...
## $ TrainingTimesLastYear : int 5 4 2 3 3 6 2 2 2 1 ...
## $ WorkLifeBalance : int 3 3 2 3 3 4 4 1 2 2 ...
## $ YearsAtCompany : int 7 3 19 5 7 1 7 2 10 5 ...
## $ YearsInCurrentRole : int 7 2 7 4 7 0 7 2 7 2 ...
## $ YearsSinceLastPromotion : int 7 0 3 0 0 0 0 2 0 0 ...
## $ YearsWithCurrManager : int 7 2 8 2 7 0 7 2 9 3 ...
MonthlyIncomeTest$Attrition <- factor(MonthlyIncomeTest$Attrition)
MonthlyIncomeTest$BusinessTravel <- factor(MonthlyIncomeTest$BusinessTravel)
MonthlyIncomeTest$Department <- factor(MonthlyIncomeTest$Department)
MonthlyIncomeTest$EducationField <- factor(MonthlyIncomeTest$EducationField)
MonthlyIncomeTest$Gender <- factor(MonthlyIncomeTest$Gender)
MonthlyIncomeTest$JobRole <- factor(MonthlyIncomeTest$JobRole)
MonthlyIncomeTest$MaritalStatus <- factor(MonthlyIncomeTest$MaritalStatus)
MonthlyIncomeTest$Over18 <- factor(MonthlyIncomeTest$Over18)
MonthlyIncomeTest$OverTime <- factor(MonthlyIncomeTest$OverTime)
str(MonthlyIncomeTest)
## 'data.frame': 300 obs. of 35 variables:
## $ ID : int 871 872 873 874 875 876 877 878 879 880 ...
## $ Age : int 43 33 55 36 27 39 33 21 30 51 ...
## $ Attrition : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 2 1 2 1 1 ...
## $ BusinessTravel : Factor w/ 3 levels "Non-Travel","Travel_Frequently",..: 2 3 3 1 3 3 1 2 2 3 ...
## $ DailyRate : int 1422 461 267 1351 1302 895 750 251 1312 1405 ...
## $ Department : Factor w/ 3 levels "Human Resources",..: 3 2 3 2 2 3 3 2 2 2 ...
## $ DistanceFromHome : int 2 13 13 9 19 5 22 10 23 11 ...
## $ Education : int 4 1 4 4 3 3 2 2 3 2 ...
## $ EducationField : Factor w/ 6 levels "Human Resources",..: 2 2 3 2 5 6 3 2 2 6 ...
## $ EmployeeCount : int 1 1 1 1 1 1 1 1 1 1 ...
## $ EmployeeNumber : int 1849 995 1372 1949 1619 42 160 1279 159 1367 ...
## $ EnvironmentSatisfaction : int 1 2 1 1 4 4 3 1 1 4 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 1 2 2 2 2 2 1 2 1 ...
## $ HourlyRate : int 92 53 85 66 67 56 95 45 96 82 ...
## $ JobInvolvement : int 3 3 4 4 2 3 3 2 1 2 ...
## $ JobLevel : int 2 1 4 1 1 2 2 1 1 4 ...
## $ JobRole : Factor w/ 9 levels "Healthcare Representative",..: 8 7 8 3 3 9 8 3 7 5 ...
## $ JobSatisfaction : int 4 4 3 2 1 4 2 3 3 2 ...
## $ MaritalStatus : Factor w/ 3 levels "Divorced","Married",..: 2 3 3 2 1 2 2 3 1 3 ...
## $ MonthlyRate : int 19246 17241 9277 9238 16290 3335 15480 25308 22310 24439 ...
## $ NumCompaniesWorked : int 1 3 6 1 1 3 0 1 1 3 ...
## $ Over18 : Factor w/ 1 level "Y": 1 1 1 1 1 1 1 1 1 1 ...
## $ OverTime : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## $ PercentSalaryHike : int 20 18 17 22 11 14 13 20 25 16 ...
## $ PerformanceRating : int 4 3 3 4 3 3 3 4 4 3 ...
## $ RelationshipSatisfaction: int 3 1 3 2 1 3 1 3 3 2 ...
## $ StandardHours : int 80 80 80 80 80 80 80 80 80 80 ...
## $ StockOptionLevel : int 1 0 0 0 2 1 1 0 3 0 ...
## $ TotalWorkingYears : int 7 5 24 5 7 19 8 2 10 29 ...
## $ TrainingTimesLastYear : int 5 4 2 3 3 6 2 2 2 1 ...
## $ WorkLifeBalance : int 3 3 2 3 3 4 4 1 2 2 ...
## $ YearsAtCompany : int 7 3 19 5 7 1 7 2 10 5 ...
## $ YearsInCurrentRole : int 7 2 7 4 7 0 7 2 7 2 ...
## $ YearsSinceLastPromotion : int 7 0 3 0 0 0 0 2 0 0 ...
## $ YearsWithCurrManager : int 7 2 8 2 7 0 7 2 9 3 ...
#Create a list to hold 11 chosen variables
IncomeTestData = attrition %>%
dplyr::select(MonthlyIncome, ID, YearsAtCompany, JobLevel, Age, Department, Education, JobRole,
OverTime, TotalWorkingYears, YearsInCurrentRole, YearsSinceLastPromotion, YearsWithCurrManager)
#Test Data and Add it to a sorted data frame by ID
IncomeTestData$MonthlyIncome <- predict(custom.model, IncomeTestData)
custom.result <- IncomeTestData %>% dplyr::select(ID,MonthlyIncome)
custom.result <- custom.result[order(custom.result$ID),]
#Verify Order and View Predictions
custom.result
## ID MonthlyIncome
## 1 1 5788.739
## 2 2 19073.711
## 3 3 9009.629
## 4 4 9307.157
## 5 5 2834.827
## 6 6 8965.599
## 7 7 2500.243
## 8 8 5788.739
## 9 9 2503.864
## 10 10 5788.739
## 11 11 18881.627
## 12 12 2650.663
## 13 13 9317.839
## 14 14 2382.455
## 15 15 2958.873
## 16 16 5711.361
## 17 17 5788.739
## 18 18 5744.709
## 19 19 2956.235
## 20 20 5876.799
## 21 21 5887.481
## 22 22 5843.451
## 23 23 4130.971
## 24 24 5876.799
## 25 25 5579.271
## 26 26 19101.777
## 27 27 5755.391
## 28 28 5700.679
## 29 29 9351.187
## 30 30 2912.205
## 31 31 5711.361
## 32 32 2984.932
## 33 33 16482.316
## 34 34 2930.220
## 35 35 5788.739
## 36 36 2588.662
## 37 37 9229.779
## 38 38 5640.957
## 39 39 19663.486
## 40 40 2470.515
## 41 41 16878.586
## 42 42 4435.468
## 43 43 3117.022
## 44 44 19013.717
## 45 45 2614.677
## 46 46 2896.872
## 47 47 5755.391
## 48 48 9087.007
## 49 49 2338.425
## 50 50 2834.827
## 51 51 2922.887
## 52 52 19558.041
## 53 53 3010.947
## 54 54 4465.555
## 55 55 5920.829
## 56 56 2710.070
## 57 57 2878.857
## 58 58 2808.812
## 59 59 12828.576
## 60 60 2382.455
## 61 61 9273.809
## 62 62 9131.037
## 63 63 2470.515
## 64 64 2570.647
## 65 65 2412.542
## 66 66 2878.857
## 67 67 5141.339
## 68 68 2588.662
## 69 69 2790.797
## 70 70 3319.158
## 71 71 3099.007
## 72 72 5579.271
## 73 73 5667.331
## 74 74 2754.100
## 75 75 9009.629
## 76 76 4597.645
## 77 77 9009.629
## 78 78 16559.694
## 79 79 9582.019
## 80 80 2412.542
## 81 81 5975.541
## 82 82 6185.009
## 83 83 5876.799
## 84 84 4843.810
## 85 85 6229.039
## 86 86 13285.071
## 87 87 6317.099
## 88 88 2710.070
## 89 89 8965.599
## 90 90 19047.065
## 91 91 2412.542
## 92 92 9053.659
## 93 93 2676.722
## 94 94 5876.799
## 95 95 18881.627
## 96 96 6195.691
## 97 97 16262.166
## 98 98 6063.601
## 99 99 2544.632
## 100 100 2676.722
## 101 101 2632.692
## 102 102 9141.719
## 103 103 5700.679
## 104 104 2790.797
## 105 105 16982.610
## 106 106 2842.160
## 107 107 5755.391
## 108 108 19619.455
## 109 109 5887.481
## 110 110 2746.767
## 111 111 2702.737
## 112 112 16394.256
## 113 113 9483.277
## 114 114 6140.979
## 115 115 6096.949
## 116 116 9009.629
## 117 117 5656.649
## 118 118 4509.585
## 119 119 3143.037
## 120 120 6052.919
## 121 121 12487.017
## 122 122 2426.485
## 123 123 9361.869
## 124 124 5788.739
## 125 125 5755.391
## 126 126 5876.799
## 127 127 3010.947
## 128 128 5755.391
## 129 129 5152.020
## 130 130 13064.921
## 131 131 5755.391
## 132 132 5788.739
## 133 133 5744.709
## 134 134 5623.301
## 135 135 4586.963
## 136 136 5667.331
## 137 137 2412.542
## 138 138 9537.989
## 139 139 2720.752
## 140 140 9097.689
## 141 141 13417.161
## 142 142 2780.115
## 143 143 9141.719
## 144 144 5744.709
## 145 145 5975.541
## 146 146 5491.211
## 147 147 2922.887
## 148 148 5788.739
## 149 149 2632.692
## 150 150 5579.271
## 151 151 5832.769
## 152 152 5755.391
## 153 153 5887.481
## 154 154 5799.421
## 155 155 2412.542
## 156 156 5491.211
## 157 157 5975.541
## 158 158 12888.801
## 159 159 9361.869
## 160 160 12548.703
## 161 161 4509.585
## 162 162 9009.629
## 163 163 5700.679
## 164 164 4711.720
## 165 165 9097.689
## 166 166 2426.485
## 167 167 16751.778
## 168 168 15503.245
## 169 169 5063.960
## 170 170 3176.386
## 171 171 5376.777
## 172 172 5097.309
## 173 173 9802.169
## 174 174 8631.015
## 175 175 16218.136
## 176 176 2658.707
## 177 177 19013.717
## 178 178 2834.827
## 179 179 8965.599
## 180 180 5964.859
## 181 181 9229.779
## 182 182 5799.421
## 183 183 3363.188
## 184 184 8965.599
## 185 185 2720.752
## 186 186 2790.797
## 187 187 5623.301
## 188 188 16647.754
## 189 189 19381.921
## 190 190 5535.241
## 191 191 4755.750
## 192 192 9087.007
## 193 193 6151.661
## 194 194 5552.897
## 195 195 2790.797
## 196 196 2614.677
## 197 197 2930.220
## 198 198 13054.239
## 199 199 9747.457
## 200 200 2412.542
## 201 201 12932.831
## 202 202 4711.720
## 203 203 5931.511
## 204 204 3010.947
## 205 205 2602.605
## 206 206 2922.887
## 207 207 5623.301
## 208 208 6063.601
## 209 209 2922.887
## 210 210 9097.689
## 211 211 13010.209
## 212 212 18881.627
## 213 213 2746.767
## 214 214 5667.331
## 215 215 2912.205
## 216 216 2922.887
## 217 217 6107.631
## 218 218 16306.196
## 219 219 2790.797
## 220 220 2922.887
## 221 221 5579.271
## 222 222 5656.649
## 223 223 5623.301
## 224 224 2614.677
## 225 225 3010.947
## 226 226 2896.872
## 227 227 2412.542
## 228 228 2746.767
## 229 229 5491.211
## 230 230 13285.071
## 231 231 5755.391
## 232 232 2426.485
## 233 233 13064.921
## 234 234 6019.571
## 235 235 16350.226
## 236 236 9439.247
## 237 237 19013.717
## 238 238 4657.008
## 239 239 2782.753
## 240 240 12504.673
## 241 241 9009.629
## 242 242 18925.657
## 243 243 2834.827
## 244 244 2602.605
## 245 245 5755.391
## 246 246 2720.752
## 247 247 9175.067
## 248 248 2591.924
## 249 249 4799.780
## 250 250 15839.521
## 251 251 2412.542
## 252 252 5964.859
## 253 253 2614.677
## 254 254 2228.019
## 255 255 12751.197
## 256 256 2878.857
## 257 257 2426.485
## 258 258 5788.739
## 259 259 8965.599
## 260 260 2676.722
## 261 261 2470.515
## 262 262 3176.386
## 263 263 2720.752
## 264 264 2470.515
## 265 265 2632.692
## 266 266 2632.692
## 267 267 16146.039
## 268 268 2533.950
## 269 269 5711.361
## 270 270 5623.301
## 271 271 5876.799
## 272 272 4755.750
## 273 273 4435.468
## 274 274 5920.829
## 275 275 19855.569
## 276 276 19311.245
## 277 277 2658.707
## 278 278 2886.190
## 279 279 5579.271
## 280 280 5612.619
## 281 281 5755.391
## 282 282 3249.112
## 283 283 5843.451
## 284 284 5887.481
## 285 285 2826.783
## 286 286 4851.143
## 287 287 2614.677
## 288 288 2500.602
## 289 289 9009.629
## 290 290 2591.924
## 291 291 2842.160
## 292 292 4675.023
## 293 293 2878.857
## 294 294 12663.137
## 295 295 16174.106
## 296 296 2710.070
## 297 297 5755.391
## 298 298 4465.555
## 299 299 2746.767
## 300 300 2912.205
## 301 301 5755.391
## 302 302 8586.985
## 303 303 2922.887
## 304 304 4833.128
## 305 305 2878.857
## 306 306 2632.692
## 307 307 2720.752
## 308 308 2956.235
## 309 309 6185.009
## 310 310 5491.211
## 311 311 4789.098
## 312 312 12932.831
## 313 313 2878.857
## 314 314 5876.799
## 315 315 5876.799
## 316 316 5711.361
## 317 317 2724.014
## 318 318 5491.211
## 319 319 5788.739
## 320 320 2614.677
## 321 321 2588.662
## 322 322 2632.333
## 323 323 3132.356
## 324 324 5229.399
## 325 325 12888.801
## 326 326 2614.677
## 327 327 19161.771
## 328 328 4789.098
## 329 329 5152.020
## 330 330 16174.106
## 331 331 16262.166
## 332 332 9307.157
## 333 333 2834.827
## 334 334 8965.599
## 335 335 19399.305
## 336 336 5009.249
## 337 337 5535.241
## 338 338 5623.301
## 339 339 5656.649
## 340 340 2533.950
## 341 341 2956.235
## 342 342 2456.572
## 343 343 8965.599
## 344 344 2614.677
## 345 345 5788.739
## 346 346 13010.209
## 347 347 5535.241
## 348 348 5579.271
## 349 349 2834.827
## 350 350 13285.071
## 351 351 12839.257
## 352 352 5755.391
## 353 353 4975.900
## 354 354 5667.331
## 355 355 9582.019
## 356 356 5975.541
## 357 357 2790.797
## 358 358 2456.213
## 359 359 5711.361
## 360 360 3072.992
## 361 361 19575.425
## 362 362 2382.455
## 363 363 19409.987
## 364 364 19117.741
## 365 365 18969.687
## 366 366 13549.251
## 367 367 4729.735
## 368 368 5711.361
## 369 369 2746.767
## 370 370 4833.128
## 371 371 5491.211
## 372 372 9131.037
## 373 373 8380.243
## 374 374 2922.887
## 375 375 2426.485
## 376 376 16471.905
## 377 377 3282.460
## 378 378 6052.919
## 379 379 2834.827
## 380 380 2878.857
## 381 381 18695.097
## 382 382 2870.813
## 383 383 5975.541
## 384 384 2470.515
## 385 385 3484.596
## 386 386 12707.167
## 387 387 9009.629
## 388 388 5667.331
## 389 389 4909.117
## 390 390 2470.515
## 391 391 5579.271
## 392 392 5535.241
## 393 393 2746.767
## 394 394 9537.989
## 395 395 2922.887
## 396 396 2824.145
## 397 397 16586.339
## 398 398 2728.041
## 399 399 6008.889
## 400 400 9273.809
## 401 401 5799.421
## 402 402 5799.421
## 403 403 2798.130
## 404 404 9087.007
## 405 405 2834.827
## 406 406 16487.597
## 407 407 6008.889
## 408 408 2798.130
## 409 409 16735.814
## 410 410 16410.219
## 411 411 2782.753
## 412 412 9009.629
## 413 413 2922.887
## 414 414 5612.619
## 415 415 2470.515
## 416 416 5975.541
## 417 417 2896.872
## 418 418 2666.040
## 419 419 5931.511
## 420 420 12608.425
## 421 421 5535.241
## 422 422 4553.256
## 423 423 5964.859
## 424 424 2368.512
## 425 425 5535.241
## 426 426 9361.869
## 427 427 2614.677
## 428 428 2702.737
## 429 429 2544.632
## 430 430 6008.889
## 431 431 5535.241
## 432 432 2834.827
## 433 433 4949.885
## 434 434 2614.677
## 435 435 9175.067
## 436 436 5788.739
## 437 437 2912.205
## 438 438 2720.752
## 439 439 2470.515
## 440 440 5876.799
## 441 441 4711.720
## 442 442 2886.190
## 443 443 2710.070
## 444 444 2870.813
## 445 445 19327.209
## 446 446 3319.158
## 447 447 5107.990
## 448 448 5755.391
## 449 449 4130.971
## 450 450 5755.391
## 451 451 5755.391
## 452 452 2834.827
## 453 453 5755.391
## 454 454 5876.799
## 455 455 16295.514
## 456 456 19003.035
## 457 457 5876.799
## 458 458 5920.829
## 459 459 2790.797
## 460 460 4630.993
## 461 461 2720.752
## 462 462 3028.962
## 463 463 9273.809
## 464 464 5711.361
## 465 465 2834.827
## 466 466 2834.827
## 467 467 2768.044
## 468 468 2588.662
## 469 469 5623.301
## 470 470 5535.241
## 471 471 2956.235
## 472 472 2614.677
## 473 473 12916.636
## 474 474 2966.917
## 475 475 2544.632
## 476 476 6052.919
## 477 477 5832.769
## 478 478 5667.331
## 479 479 3018.280
## 480 480 12751.197
## 481 481 5579.271
## 482 482 3044.296
## 483 483 5097.309
## 484 484 3407.218
## 485 485 10451.938
## 486 486 5667.331
## 487 487 2382.455
## 488 488 2790.797
## 489 489 8336.213
## 490 490 5755.391
## 491 491 19239.149
## 492 492 16542.309
## 493 493 5667.331
## 494 494 9053.659
## 495 495 5876.799
## 496 496 16647.754
## 497 497 2922.887
## 498 498 2746.767
## 499 499 6063.601
## 500 500 5876.799
## 501 501 10055.668
## 502 502 2632.692
## 503 503 3174.996
## 504 504 9131.037
## 505 505 16707.748
## 506 506 4799.780
## 507 507 2382.455
## 508 508 9185.749
## 509 509 3032.224
## 510 510 19161.771
## 511 511 5755.391
## 512 512 9141.719
## 513 513 3010.947
## 514 514 12707.167
## 515 515 2470.515
## 516 516 5887.481
## 517 517 2382.455
## 518 518 2676.722
## 519 519 4509.585
## 520 520 2588.662
## 521 521 2956.235
## 522 522 2412.542
## 523 523 8626.409
## 524 524 19635.419
## 525 525 2878.857
## 526 526 9890.229
## 527 527 5711.361
## 528 528 5579.271
## 529 529 2834.827
## 530 530 3000.266
## 531 531 9449.929
## 532 532 6096.949
## 533 533 3010.947
## 534 534 5832.769
## 535 535 2798.130
## 536 536 2878.857
## 537 537 9009.629
## 538 538 2694.693
## 539 539 2602.605
## 540 540 5832.769
## 541 541 4630.993
## 542 542 3308.476
## 543 543 2614.677
## 544 544 6052.919
## 545 545 2956.235
## 546 546 3010.947
## 547 547 8600.393
## 548 548 4630.993
## 549 549 12883.288
## 550 550 2904.161
## 551 551 18925.657
## 552 552 2782.753
## 553 553 4659.646
## 554 554 3264.446
## 555 555 5156.627
## 556 556 6063.601
## 557 557 5027.263
## 558 558 2834.827
## 559 559 5755.391
## 560 560 5667.331
## 561 561 2940.902
## 562 562 16575.657
## 563 563 2666.040
## 564 564 12619.107
## 565 565 2676.722
## 566 566 2834.827
## 567 567 4465.555
## 568 568 5755.391
## 569 569 5755.391
## 570 570 12932.831
## 571 571 5931.511
## 572 572 2614.677
## 573 573 5623.301
## 574 574 5755.391
## 575 575 5579.271
## 576 576 5744.709
## 577 577 3219.026
## 578 578 2676.722
## 579 579 5755.391
## 580 580 5579.271
## 581 581 2676.722
## 582 582 15767.425
## 583 583 5975.541
## 584 584 5788.739
## 585 585 5832.769
## 586 586 4729.735
## 587 587 5019.930
## 588 588 2900.134
## 589 589 2720.752
## 590 590 2666.040
## 591 591 2746.767
## 592 592 6140.979
## 593 593 2558.575
## 594 594 2930.220
## 595 595 9361.869
## 596 596 5711.361
## 597 597 9273.809
## 598 598 4949.885
## 599 599 5579.271
## 600 600 2834.827
## 601 601 2382.455
## 602 602 4789.098
## 603 603 12442.987
## 604 604 2808.812
## 605 605 2426.485
## 606 606 16850.519
## 607 607 6195.691
## 608 608 5667.331
## 609 609 2720.752
## 610 610 2470.515
## 611 611 4509.585
## 612 612 5876.799
## 613 613 2940.902
## 614 614 2382.455
## 615 615 9009.629
## 616 616 4729.735
## 617 617 2842.160
## 618 618 5332.747
## 619 619 8965.599
## 620 620 2412.542
## 621 621 2412.542
## 622 622 6317.099
## 623 623 3132.356
## 624 624 7991.262
## 625 625 2720.752
## 626 626 2790.797
## 627 627 5535.241
## 628 628 12575.077
## 629 629 9582.019
## 630 630 8965.599
## 631 631 3528.626
## 632 632 9185.749
## 633 633 6273.069
## 634 634 2676.722
## 635 635 9009.629
## 636 636 2577.980
## 637 637 16262.166
## 638 638 9087.007
## 639 639 2834.827
## 640 640 2558.575
## 641 641 2412.542
## 642 642 5975.541
## 643 643 16614.406
## 644 644 2912.205
## 645 645 2790.797
## 646 646 9351.187
## 647 647 2368.512
## 648 648 2622.010
## 649 649 6140.979
## 650 650 2808.812
## 651 651 2500.602
## 652 652 5964.859
## 653 653 13048.726
## 654 654 12515.084
## 655 655 2544.632
## 656 656 5535.241
## 657 657 12872.606
## 658 658 16515.664
## 659 659 5667.331
## 660 660 5579.271
## 661 661 4630.993
## 662 662 3308.476
## 663 663 9141.719
## 664 664 2500.602
## 665 665 5876.799
## 666 666 5755.391
## 667 667 9053.659
## 668 668 2720.752
## 669 669 4509.585
## 670 670 17026.640
## 671 671 12872.606
## 672 672 5700.679
## 673 673 5535.241
## 674 674 3132.356
## 675 675 2790.797
## 676 676 19101.777
## 677 677 5755.391
## 678 678 4509.585
## 679 679 6107.631
## 680 680 3032.224
## 681 681 5535.241
## 682 682 2720.752
## 683 683 5876.799
## 684 684 9097.689
## 685 685 2790.797
## 686 686 9141.719
## 687 687 8965.599
## 688 688 3000.266
## 689 689 2412.542
## 690 690 3090.963
## 691 691 4781.054
## 692 692 12416.342
## 693 693 6052.919
## 694 694 2702.737
## 695 695 5700.679
## 696 696 2591.924
## 697 697 15883.552
## 698 698 4773.765
## 699 699 2338.425
## 700 700 19249.831
## 701 701 2562.603
## 702 702 5579.271
## 703 703 9273.809
## 704 704 2782.753
## 705 705 2826.783
## 706 706 6019.571
## 707 707 5009.249
## 708 708 2870.813
## 709 709 2588.662
## 710 710 16735.814
## 711 711 12372.312
## 712 712 16658.436
## 713 713 2676.722
## 714 714 9395.217
## 715 715 9131.037
## 716 716 2562.603
## 717 717 5755.391
## 718 718 4711.720
## 719 719 6008.889
## 720 720 9009.629
## 721 721 9009.629
## 722 722 5711.361
## 723 723 3132.356
## 724 724 8851.165
## 725 725 2720.752
## 726 726 5535.241
## 727 727 2778.726
## 728 728 2912.205
## 729 729 2412.542
## 730 730 2500.602
## 731 731 5491.211
## 732 732 5843.451
## 733 733 2562.603
## 734 734 5700.679
## 735 735 2736.085
## 736 736 5711.361
## 737 737 12888.801
## 738 738 5887.481
## 739 739 5535.241
## 740 740 4465.555
## 741 741 5579.271
## 742 742 12294.934
## 743 743 4711.720
## 744 744 2500.602
## 745 745 5579.271
## 746 746 2456.572
## 747 747 2694.693
## 748 748 5931.511
## 749 749 2632.692
## 750 750 2746.767
## 751 751 2426.485
## 752 752 2470.515
## 753 753 12994.517
## 754 754 9185.749
## 755 755 12872.606
## 756 756 2764.782
## 757 757 5755.391
## 758 758 6317.099
## 759 759 2676.722
## 760 760 19101.777
## 761 761 2702.737
## 762 762 16394.256
## 763 763 6151.661
## 764 764 6151.661
## 765 765 2694.693
## 766 766 4130.971
## 767 767 3010.947
## 768 768 9449.929
## 769 769 5964.859
## 770 770 6096.949
## 771 771 5711.361
## 772 772 9626.049
## 773 773 9571.337
## 774 774 2658.707
## 775 775 9626.049
## 776 776 2658.707
## 777 777 2782.753
## 778 778 2382.455
## 779 779 2676.722
## 780 780 2868.175
## 781 781 2514.545
## 782 782 6317.099
## 783 783 2826.783
## 784 784 5755.391
## 785 785 5612.619
## 786 786 15459.215
## 787 787 12515.084
## 788 788 4877.158
## 789 789 19646.101
## 790 790 2658.707
## 791 791 4509.585
## 792 792 2966.917
## 793 793 9009.629
## 794 794 9317.839
## 795 795 5744.709
## 796 796 3010.947
## 797 797 12888.801
## 798 798 2412.542
## 799 799 2614.677
## 800 800 5887.481
## 801 801 5755.391
## 802 802 5755.391
## 803 803 16174.106
## 804 804 8965.599
## 805 805 4817.795
## 806 806 9229.779
## 807 807 5579.271
## 808 808 2658.707
## 809 809 3010.947
## 810 810 2834.827
## 811 811 4597.645
## 812 812 2676.722
## 813 813 5656.649
## 814 814 19267.215
## 815 815 2470.515
## 816 816 3231.098
## 817 817 5843.451
## 818 818 12663.137
## 819 819 2632.692
## 820 820 5579.271
## 821 821 2956.235
## 822 822 5755.391
## 823 823 5667.331
## 824 824 2456.572
## 825 825 5744.709
## 826 826 19283.179
## 827 827 9131.037
## 828 828 5491.211
## 829 829 2764.782
## 830 830 19365.957
## 831 831 4465.555
## 832 832 2338.425
## 833 833 2754.100
## 834 834 2614.677
## 835 835 13461.191
## 836 836 9087.007
## 837 837 2922.887
## 838 838 5876.799
## 839 839 9802.169
## 840 840 19425.951
## 841 841 2368.512
## 842 842 7913.884
## 843 843 13450.509
## 844 844 9009.629
## 845 845 5579.271
## 846 846 4509.585
## 847 847 2412.542
## 848 848 9405.899
## 849 849 5667.331
## 850 850 2720.752
## 851 851 5491.211
## 852 852 2808.812
## 853 853 4509.585
## 854 854 5711.361
## 855 855 5491.211
## 856 856 2382.455
## 857 857 2922.887
## 858 858 9009.629
## 859 859 2798.130
## 860 860 9009.629
## 861 861 2720.752
## 862 862 9703.427
## 863 863 5711.361
## 864 864 13197.011
## 865 865 9009.629
## 866 866 9053.659
## 867 867 5612.619
## 868 868 5799.421
## 869 869 2878.857
## 870 870 4509.585
#Save CSV File
#write.csv(custom.result,"Case2PredictionsRegressMiller.csv",row.names = FALSE)